fileprefix = "20201221_"
ref_file_mm10 = "~/Documents/reference_data/mouse/ensembl/ensembl_gene_length_mm10.txt.gz"
# This is required to set the fonts of the paper plots to Arial
library(extrafont)
font_import(prompt=FALSE)
loadfonts()
library(RColorBrewer)
library(Seurat)
library(scater)
library(cowplot)
library(dplyr)
library(tidyr)
library(SoupX)
calc_tpm = function(dat, ref_file) {
mm_annot = read.table(ref_file, header=T, stringsAsFactors=F, sep="\t")
eff_length = mm_annot$eff_length[match(rownames(dat), mm_annot$mgi_symbol)]
eff_length[!is.finite(eff_length)] = 1
sce = SingleCellExperiment(assays=list(counts=as.matrix(dat), logcounts=log2(as.matrix(dat)+1)))
tpm = calculateTPM(sce, eff_length)
return(tpm)
}
theme_publication_plot = function(p, legend_title, legend_aes=4) {
p = p +
theme_cowplot() +
guides(fill = guide_legend(title=legend_title,
override.aes = list(size=legend_aes)),
colour = guide_legend(title=legend_title,
override.aes = list(size=legend_aes))) +
theme(axis.text = element_text(family="Arial", colour="black",size=10,face="plain"),
axis.title = element_text(family="Arial", colour="black",size=12,face="plain"),
strip.text.x = element_text(family="Arial", colour="black",size=12,face="plain", angle=90),
strip.text.y = element_text(family="Arial", colour="black",size=12,face="plain", angle=360),
strip.background = element_blank(),
panel.spacing.x = unit(1, "mm"),
panel.spacing.y = unit(3, "mm"),
plot.title = element_text(family="Arial", colour="black",size=12,face="plain",hjust = 0.5))
return(p)
}
samplenames = c("EA_WT_1", "EA_WT_2", "EA_NOTCH1_HOM_1", "EA_NOTCH1_HOM_2")
library_names = c("WT 1", "WT 2", "HOM KO 1", "HOM KO 2")
prefixes = c("c1_", "c2_", "s1_", "s2_")
expr_matrices = list()
celltypes_all = list()
for (i in 1:length(samplenames)) {
prefix = prefixes[i]
in_path = file.path("../cellranger/", samplenames[i])
celltypes = read.table(file.path("../identify_celltypes", samplenames[i], "celltypes_per_cell.txt"), header=T, stringsAsFactors=F)
sc = load10X(in_path)
out = sc$toc
colnames(out) = paste0(prefix, colnames(out))
celltypes$cell = paste0(prefix, celltypes$cell)
expr_matrices[[length(expr_matrices)+1]] = out
celltypes_all[[length(celltypes_all)+1]] = celltypes
}
stats = list()
for (i in 1:length(samplenames)) {
stats_sample = read.table(file.path("../cellranger/", samplenames[i], "metrics_summary.csv"), sep=",", header=T, stringsAsFactors=F)
stats_sample$library = samplenames[i]
stats[[samplenames[i]]] = stats_sample
}
stats = do.call(rbind, stats)
print(stats)
nFeature_RNA_min = 2500
nFeature_RNA_max = 6500
nCount_RNA_max = 55000
prop.mt_min = 0.03
prop.mt_max = 0.1
seu = list()
for (i in 1:length(expr_matrices)) {
seu[[i]] = CreateSeuratObject(counts = expr_matrices[[i]], min.cells = 0, min.features = 0)
mito.genes = rownames(GetAssayData(object=seu[[i]]))[grepl(pattern = "^MT-", x = toupper(rownames(GetAssayData(object=seu[[i]]))))]
percent.mito = Matrix::colSums(GetAssayData(object=seu[[i]], slot="counts")[mito.genes, ]) / Matrix::colSums(GetAssayData(object=seu[[i]], slot="counts"))
seu[[i]] = AddMetaData(object = seu[[i]], metadata = percent.mito, col.name = "prop.mt")
# plot prop.mt vs nFeature_RNA and vs nCount_RNA
p1 = ggplot(seu[[i]]@meta.data) + aes(x=prop.mt, y=nFeature_RNA) + geom_point(size=0.1, colour="red") + scale_y_log10() + theme_cowplot() + geom_vline(xintercept=c(prop.mt_min, prop.mt_max), linetype=2) + geom_hline(yintercept=c(nFeature_RNA_min, nFeature_RNA_max), linetype=2)
p2 = ggplot(seu[[i]]@meta.data) + aes(x=prop.mt, y=nCount_RNA) + geom_point(size=0.1, colour="blue") + scale_y_log10() + theme_cowplot() + geom_vline(xintercept=c(prop.mt_min, prop.mt_max), linetype=2) + geom_hline(yintercept=c(nCount_RNA_max), linetype=2)
print(plot_grid(p1, p2))
p1 = VlnPlot(seu[[i]], group.by="orig.ident", features = c( "nFeature_RNA"), ncol = 1, pt.size=0) +
geom_hline(yintercept = c(nFeature_RNA_min, nFeature_RNA_max), linetype=2) + theme(legend.position="none")
p2 = VlnPlot(seu[[i]], group.by="orig.ident", features = c( "nCount_RNA"), ncol = 1, pt.size=0) +
geom_hline(yintercept = c(nCount_RNA_max), linetype=2) + theme(legend.position="none")
p3 = VlnPlot(seu[[i]], group.by="orig.ident", features = c( "prop.mt"), ncol = 1, pt.size=0) + theme(legend.position="none") + geom_hline(yintercept = c(prop.mt_max, prop.mt_min), linetype=2)
# if (grepl("KO", samplenames[i], fixed=T)) {
# p3 = p3 + geom_hline(yintercept = c(0.1, 0.002), linetype=2)
# } else {
# p3 = p3 + geom_hline(yintercept = c(0.1, 0.01), linetype=2)
# }
print(plot_grid(p1, p2, p3, ncol=3))
}
First adjustments per library
seu = list()
for (i in 1:length(expr_matrices)) {
seu[[i]] = CreateSeuratObject(counts = expr_matrices[[i]], min.cells = 30, min.features = 2500)
mito.genes = rownames(GetAssayData(object=seu[[i]]))[grepl(pattern = "^MT-", x = toupper(rownames(GetAssayData(object=seu[[i]]))))]
percent.mito = Matrix::colSums(GetAssayData(object=seu[[i]], slot="counts")[mito.genes, ]) / Matrix::colSums(GetAssayData(object=seu[[i]], slot="counts"))
seu[[i]] = AddMetaData(object = seu[[i]], metadata = percent.mito, col.name = "prop.mt")
# Restrict proportion MT expression to remove unhappy cells
seu[[i]] = subset(x = seu[[i]], subset = prop.mt > 0.03 & prop.mt < 0.1)
# Remove potential doublets
seu[[i]] = subset(x = seu[[i]], subset = nFeature_RNA < 6500 & nCount_RNA < 55000)
seu[[i]] = SCTransform(seu[[i]], verbose = FALSE, vars.to.regress=c("prop.mt", "nFeature_RNA", "nCount_RNA"))
seu[[i]] = CellCycleScoring(seu[[i]], s.features = cc.genes$s.genes, g2m.features = cc.genes$g2m.genes, assay = 'SCT', set.ident = TRUE)
# Enable this when also adjusting for cell cycle
seu[[i]] = SCTransform(seu[[i]], verbose = FALSE, vars.to.regress=c("prop.mt", "nFeature_RNA", "nCount_RNA", "S.Score", "G2M.Score"))
seu[[i]] = AddMetaData(object=seu[[i]], metadata=factor(rep(library_names[i], length(seu[[i]]$nCount_RNA)), levels=library_names), col.name="library")
}
Combine the libraries with application of batch effect correction
# This is required for PrepSCTIntegration, an increase from the R default 512 to 5120 for this dataset
options(future.globals.maxSize=5120*1024^2)
seu_features = SelectIntegrationFeatures(object.list = seu, nfeatures = 3000)
seu = PrepSCTIntegration(object.list = seu, anchor.features = seu_features, verbose = FALSE)
seu = lapply(X = seu, FUN = RunPCA, verbose = FALSE, features = seu_features)
seu_anchors = FindIntegrationAnchors(object.list = seu, normalization.method = "SCT", anchor.features = seu_features, verbose = FALSE, reduction = "rpca")
seu_integrated = IntegrateData(anchorset = seu_anchors, normalization.method = "SCT", verbose = FALSE)
seu_integrated@meta.data$library = factor(seu_integrated@meta.data$library, levels=library_names)
Make some summary plots
seu_integrated = RunPCA(seu_integrated, verbose = FALSE)
VlnPlot(seu_integrated, group.by="orig.ident", features = c("nFeature_RNA", "nCount_RNA", "percent.mito"), ncol = 3, pt.size=0.01)
The following requested variables were not found: percent.mito
ElbowPlot(seu_integrated, ndims=50)
Typically you’d pick a point on the above curve where the tail starts to flatten. I’ve included a few more PCs here to make sure we’re capturing a bit more possibly relevant variation to increase separation between cell types. Went for 30 PCs here.
seu_integrated = RunUMAP(seu_integrated, dims = 1:30)
The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session12:36:06 UMAP embedding parameters a = 0.9922 b = 1.112
12:36:06 Read 13111 rows and found 30 numeric columns
12:36:06 Using Annoy for neighbor search, n_neighbors = 30
12:36:06 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
12:36:09 Writing NN index file to temp file /var/folders/bz/p63rv1754pv9v6rz33xgmy8sgyrrg3/T//RtmpsxGViF/fileb6712d3677f1
12:36:09 Searching Annoy index using 1 thread, search_k = 3000
12:36:14 Annoy recall = 100%
12:36:16 Commencing smooth kNN distance calibration using 1 thread
12:36:19 Initializing from normalized Laplacian + noise
12:36:20 Commencing optimization for 200 epochs, with 564502 positive edges
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
12:36:29 Optimization finished
UMAPPlot(seu_integrated)
p = UMAPPlot(seu_integrated,group.by="library")
p = theme_publication_plot(p, "Library")
print(p)
cowplot::save_plot(plot=p,
filename = paste0(fileprefix, "supplfig_umap_overlay_library.pdf"),
base_height=4,
base_width=6)
FeaturePlot(seu_integrated, features=c("Krt14", "Tgm3", "Krt4", "Lor"))
VlnPlot(seu_integrated, group.by="orig.ident", features = "prop.mt", ncol = 1, pt.size=0.01)
FeaturePlot(seu_integrated, features=c("nFeature_RNA", "nCount_RNA", "prop.mt"))
First annotate clusters with expression of a number of marker genes and pick a threshold per cluster. That data is then further down used to assign cell type labels
celltypes = do.call(rbind, celltypes_all)
tpm = calc_tpm(seu_integrated@assays[["RNA"]]@counts, ref_file_mm10)
# get tpm for marker genes per cluster
seu_integrated = FindNeighbors(seu_integrated, dims = 1:30)
Computing nearest neighbor graph
Computing SNN
seu_integrated = FindClusters(object = seu_integrated, resolution = 0.6, verbose=FALSE)
markers_all_cells = FindAllMarkers(seu_integrated, only.pos = TRUE, min.pct = 0.1, thresh.use = 0.25, verbose = F)
mdata = seu_integrated@meta.data
mdata$celltype = NA
inboth = intersect(gsub("-1", "", rownames(mdata)), celltypes$cell)
mdata$celltype[match(gsub("-1", "", rownames(mdata)), inboth)] = celltypes$classification[match(inboth, celltypes$cell)]
seu_integrated = AddMetaData(seu_integrated, mdata$celltype, "celltype")
UMAPPlot(seu_integrated, group.by="celltype")
cluster_classification = data.frame(clusterid=sort(unique(mdata$seurat_clusters)))
make_plot = function(mdata, tpm, gene_name) {
plot_dat = data.frame(cell=rownames(mdata), cluster=mdata$seurat_clusters, tpm=tpm[gene_name,rownames(mdata)])
plot_dat = plot_dat[is.finite(plot_dat$tpm),]
plot_dat_clusters = plot_dat %>% group_by(cluster) %>% summarise(n=n(), mean=mean(tpm), median=median(tpm))
p2 = ggplot(plot_dat_clusters) + aes(x=cluster, y=median, label=cluster) + geom_text() + theme_cowplot() + ggtitle(gene_name)
return(list(p=p2, plot_dat_clusters=plot_dat_clusters))
}
make_umap_plot = function(plot_dat, gene_name) {
plot_dat[, gene_name] = log(plot_dat[, gene_name])
gene_name_title = paste0(toupper(substr(gene_name, 1, 1)), substr(gene_name, 2, nchar(gene_name)))
return(ggplot(plot_dat) + aes_string(x="UMAP_1", y="UMAP_2", colour=gene_name) +
geom_point(size=0.25) + scale_colour_gradient(low="grey", high="red") +
theme_cowplot() + ggtitle(gene_name_title))
}
plot_data = as.data.frame(seu_integrated@reductions$umap@cell.embeddings)
# Keratinocytes
p1 = make_plot(mdata, tpm, "Krt14")
`summarise()` ungrouping output (override with `.groups` argument)
Registered S3 method overwritten by 'cli':
method from
print.boxx spatstat
cluster_classification$krt14 = p1$plot_dat_clusters$median > 50
plot_data$krt14 = tpm["Krt14", rownames(plot_data)]
p2 = make_plot(mdata, tpm, "Tgm3")
`summarise()` ungrouping output (override with `.groups` argument)
cluster_classification$tgm3 = p2$plot_dat_clusters$median > 5
plot_data$tgm3 = tpm["Tgm3", rownames(plot_data)]
p3 = make_plot(mdata, tpm, "Lor")
`summarise()` ungrouping output (override with `.groups` argument)
cluster_classification$lor = p3$plot_dat_clusters$median > 50
plot_data$lor = tpm["Lor", rownames(plot_data)]
p4 = make_plot(mdata, tpm, "Krt4")
`summarise()` ungrouping output (override with `.groups` argument)
# cluster_classification$tgm3 = plot_dat_clusters$median > 10
plot_data$krt4 = tpm["Krt4", rownames(plot_data)]
plot_grid(plotlist=list(p1$p, p2$p, p3$p, p4$p), ncol=2)
p = plot_grid(plotlist=list(theme_publication_plot(make_umap_plot(plot_data, "krt14"), "log(TPM)"),
theme_publication_plot(make_umap_plot(plot_data, "tgm3"), "log(TPM)"),
theme_publication_plot(make_umap_plot(plot_data, "lor"), "log(TPM)"),
theme_publication_plot(make_umap_plot(plot_data, "krt4"), "log(TPM)")), ncol=2, nrow=2)
print(p)
plot_grid(plotlist=list(make_umap_plot(plot_data, "krt14"),
make_umap_plot(plot_data, "krt4")), ncol=2, nrow=2)
plot_grid(plotlist=list(make_umap_plot(plot_data, "krt14"),
make_umap_plot(plot_data, "tgm3")), ncol=2, nrow=2)
cowplot::save_plot(plot=p,
filename = paste0(fileprefix, "supplfig_markers_keratinocyte.pdf"),
base_height=4,
base_width=6)
# Fibroblasts and endothelial cells
p1 = make_plot(mdata, tpm, "Col1a1")
`summarise()` ungrouping output (override with `.groups` argument)
cluster_classification$col1a1 = p1$plot_dat_clusters$median > 10
plot_data$col1a1 = tpm["Col1a1", rownames(plot_data)]
p2 = make_plot(mdata, tpm, "Pecam1")
`summarise()` ungrouping output (override with `.groups` argument)
cluster_classification$pecam1 = p2$plot_dat_clusters$median > 2
plot_data$pcam1 = tpm["Pecam1", rownames(plot_data)]
plot_grid(plotlist=list(p1$p, p2$p), ncol=2)
p = plot_grid(plotlist=list(theme_publication_plot(make_umap_plot(plot_data, "col1a1"), "log(TPM)"),
theme_publication_plot(make_umap_plot(plot_data, "pcam1"), "log(TPM)")), ncol=2, nrow=2)
print(p)
cowplot::save_plot(plot=p,
filename = paste0(fileprefix, "supplfig_markers_fibroblast_endothelial.pdf"),
base_height=4,
base_width=6)
# Immune cells
# B-cell marker
p1 = make_plot(mdata, tpm, "Cd83")
`summarise()` ungrouping output (override with `.groups` argument)
cluster_classification$cd83 = p1$plot_dat_clusters$median > 1
plot_data$cd83 = tpm["Cd83", rownames(plot_data)]
p2 = make_plot(mdata, tpm, "Cd84")
`summarise()` ungrouping output (override with `.groups` argument)
cluster_classification$cd84 = p2$plot_dat_clusters$median > 1
plot_data$cd84 = tpm["Cd84", rownames(plot_data)]
p3 = make_plot(mdata, tpm, "Cd86")
`summarise()` ungrouping output (override with `.groups` argument)
cluster_classification$cd86 = p3$plot_dat_clusters$median > 0.4
plot_data$cd86 = tpm["Cd86", rownames(plot_data)]
plot_grid(plotlist=list(p1$p, p2$p, p3$p), ncol=2)
plot_grid(plotlist=list(make_umap_plot(plot_data, "cd83"),
make_umap_plot(plot_data, "cd84"),
make_umap_plot(plot_data, "cd86")), ncol=2, nrow=2)
# T-cells
# t-cell marker
p1 = make_plot(mdata, tpm, "Trbc2")
`summarise()` ungrouping output (override with `.groups` argument)
cluster_classification$trbc2 = p1$plot_dat_clusters$median > 50
plot_data$trbc2 = tpm["Trbc2", rownames(plot_data)]
# not specific
p2 = make_plot(mdata, tpm, "Cd52")
`summarise()` ungrouping output (override with `.groups` argument)
cluster_classification$cd52 = p2$plot_dat_clusters$median > 50
plot_data$cd52 = tpm["Cd52", rownames(plot_data)]
# t-cell marker
p3 = make_plot(mdata, tpm, "Ptprc")
`summarise()` ungrouping output (override with `.groups` argument)
cluster_classification$ptprc = p3$plot_dat_clusters$median > 50
plot_data$ptprc = tpm["Ptprc", rownames(plot_data)]
plot_grid(plotlist=list(p1$p, p2$p, p3$p), ncol=2)
p = plot_grid(plotlist=list(theme_publication_plot(make_umap_plot(plot_data, "ptprc"), "log(TPM)"),
theme_publication_plot(make_umap_plot(plot_data, "cd52"), "log(TPM)")), ncol=2, nrow=2)
print(p)
cowplot::save_plot(plot=p,
filename = paste0(fileprefix, "supplfig_markers_immune.pdf"),
base_height=4,
base_width=6)
Handy plot with large cluster numbers
plot <- DimPlot(object = seu_integrated, reduction = "umap")
LabelClusters(plot = plot, id = 'ident', size=6)
Now assign cell type labels based on the above markers and make a plot
cluster_classification$celltype = NA
cluster_classification$celltype[cluster_classification$krt14 | cluster_classification$tgm3 | cluster_classification$lor] = "keratinocyte"
cluster_classification$celltype[cluster_classification$col1a1] = "fibroblast"
cluster_classification$celltype[cluster_classification$pecam1] = "endothelial"
cluster_classification$celltype[cluster_classification$cd83 | cluster_classification$cd84 | cluster_classification$cd86 | cluster_classification$cd52 | cluster_classification$trbc2] = "immune"
cluster_classification_table = cluster_classification$celltype
names(cluster_classification_table) = as.character(cluster_classification$clusterid)
seu_integrated = AddMetaData(seu_integrated, cluster_classification_table[as.character(seu_integrated@meta.data$seurat_clusters)], "celltype")
UMAPPlot(seu_integrated, group.by="celltype")
plot_dat = as.data.frame(seu_integrated@reductions[["umap"]]@cell.embeddings)
plot_dat$celltype = seu_integrated@meta.data$celltype
plot_dat$celltype = unlist(lapply(plot_dat$celltype, function(x) { paste(toupper(substr(x, 1, 1)), substr(x, 2, nchar(x)), sep="") }))
plot_dat$celltype_fctr = factor(plot_dat$celltype, levels=c("Keratinocyte", "Fibroblast", "Immune", "Endothelial"))
A number of purple coloured cells appear in the clusters of other cell types, looking at the above UMAP space. Here their labels are set to NA, as there appears a discrepancy between their cluster assignment and visual clustering in the 2D UMAP space. Taking these few out due to the uncertainty, we don’t want to include any cells of other cell types further down.
plot_data = as.data.frame(seu_integrated@reductions$umap@cell.embeddings)
plot_data$celltype = seu_integrated@meta.data$celltype
plot_data = plot_data[plot_data$celltype=="keratinocyte",]
plot_data$selection = plot_data$UMAP_1 > 5 | plot_data$UMAP_2 > 5
plot_data$selection_fctr = factor(plot_data$selection, levels=c(TRUE, FALSE))
mycolours = c("red", "grey")
names(mycolours) = c(TRUE, FALSE)
p = ggplot(plot_data) + aes_string(x="UMAP_1", y="UMAP_2", colour="selection_fctr") +
geom_point(size=0.25) + scale_colour_manual(values=mycolours) +
theme_cowplot()
print(p)
seu_integrated@meta.data$celltype[rownames(seu_integrated@meta.data) %in% rownames(plot_data)[plot_data$selection]] = NA
UMAPPlot(seu_integrated, group.by="Phase")
plot_data = as.data.frame(seu_integrated@reductions$umap@cell.embeddings)
plot_data$celltype = seu_integrated@meta.data$celltype
plot_data$celltype = unlist(lapply(plot_data$celltype, function(x) { paste(toupper(substr(x, 1, 1)), substr(x, 2, nchar(x)), sep="") }))
plot_data$celltype_fctr = factor(plot_data$celltype, levels=c("Keratinocyte", "Fibroblast", "Immune", "Endothelial"))
mycolours = brewer.pal(4, "Dark2")
p = ggplot(plot_data) +
aes(x=UMAP_1, y=UMAP_2, colour=celltype_fctr) +
geom_point(size=0.2) +
scale_colour_manual(values=mycolours, na.value="grey") +
xlab("UMAP 1") + ylab("UMAP 2")
p = theme_publication_plot(p, "Cell type")
print(p)
cowplot::save_plot(plot=p,
filename = paste0(fileprefix, "figure5d.pdf"),
base_height=4,
base_width=6)
Plot Notch1 expression per library
notch1_expr = data.frame(expression=tpm["Notch1", rownames(seu_integrated@meta.data)], library=seu_integrated@meta.data$library)
notch1_expr$library = factor(notch1_expr$library, levels=c("WT 1", "WT 2", "HOM KO 1", "HOM KO 2"))
p = ggplot(notch1_expr) + aes(x=library, y=expression, fill=library) + geom_boxplot() +
theme_cowplot() + xlab("Library") + ylab("TPM")
p = theme_publication_plot(p, "Library", legend_aes = 1)
print(p)
cowplot::save_plot(plot=p,
filename = paste0(fileprefix, "supplfig_notch1_expression_all_cells.pdf"),
base_height=4,
base_width=6)
save(file=paste0(fileprefix, "notch1_batch_effect_with_filter_with_adjustment.RData"), seu_integrated, markers_all_cells)
metdata = seu_integrated@meta.data
metdata$cell = rownames(metdata)
write.table(metdata, file=paste0(fileprefix, "notch1_celltypes_and_metadata.txt"), quote=F, sep="\t", row.names=F)
celltype_count = seu_integrated@meta.data[, c("orig.ident", "celltype")] %>% group_by(orig.ident, celltype) %>% summarise(n=n()) %>% spread(celltype, n)
`summarise()` regrouping output by 'orig.ident' (override with `.groups` argument)
celltype_frac = celltype_count[, 2:ncol(celltype_count)] / rowSums(celltype_count[, 2:ncol(celltype_count)], na.rm=T)
celltype_frac = cbind(data.frame(library=c("WT 1", "WT 2", "HOM KO 1", "HOM KO 2")), celltype_frac)
celltype_frac
write.table(celltype_frac, file=paste0(fileprefix, "notch1_cell_type_counts_per_library.txt"), quote=F, sep="\t", row.names=F)
celltype_frac$library = factor(celltype_frac$library, levels=c("WT 1", "WT 2", "HOM KO 1", "HOM KO 2"))
cell_type_frac_wider = celltype_frac %>% pivot_longer(!library)
cell_type_frac_wider$name = unlist(lapply(cell_type_frac_wider$name, function(x) { paste(toupper(substr(x, 1, 1)), substr(x, 2, nchar(x)), sep="") }))
cell_type_frac_wider$name = factor(cell_type_frac_wider$name, levels=levels(plot_dat$celltype_fctr))
p = ggplot(cell_type_frac_wider) +
aes(x=library, y=value, fill=name) +
geom_bar(position="dodge", stat="identity") +
scale_fill_manual(values=mycolours, na.value="grey") +
xlab("Library") + ylab("Proportion of cells per library")
p = theme_publication_plot(p, "Cell type")
print(p)
cowplot::save_plot(plot=p,
filename = paste0(fileprefix, "figure5e_alt1.pdf"),
base_height=4,
base_width=6)
p = ggplot(cell_type_frac_wider) +
aes(x=library, y=value, fill=name) +
geom_bar(stat="identity", position = position_fill(reverse = TRUE)) +
scale_fill_manual(values=mycolours, na.value="grey") +
xlab("Library") + ylab("Proportion of cells per library")
p = theme_publication_plot(p, "Cell type")
print(p)
cowplot::save_plot(plot=p,
filename = paste0(fileprefix, "figure5e_alt2.pdf"),
base_height=4,
base_width=6)
Rerun integration because we are no longer now including cell cycle adjustment
keratinocyte_barcodes = rownames(seu_integrated@meta.data)[seu_integrated@meta.data$celltype=="keratinocyte" & !is.na(seu_integrated@meta.data$celltype)]
# Initial run found these cells as an outlier cluster and expressing fibroblast markers. Here these are also removed
misidentified_fibroblasts = read.table("20201211_fibroblast_identified_as_keratinocyte.txt", header=T, stringsAsFactors=F)
keratinocyte_barcodes = keratinocyte_barcodes[keratinocyte_barcodes %in% misidentified_fibroblasts$cell[!misidentified_fibroblasts$fibroblast_identified_as_keratinocyte]]
outlier_cells = read.table("20201211_notch1_cluster_assignments_to_remove_outlier_cells.txt", header=T, stringsAsFactors=F)
outlier_cells = outlier_cells[outlier_cells$is_outlier,]
keratinocyte_barcodes = keratinocyte_barcodes[!keratinocyte_barcodes %in% outlier_cells$cell]
seu_kera = list()
for (i in 1:length(expr_matrices)) {
seu_kera[[i]] = CreateSeuratObject(counts = expr_matrices[[i]][, colnames(expr_matrices[[i]]) %in% keratinocyte_barcodes], min.cells = 30, min.features = 3000)
mito.genes <- rownames(GetAssayData(object=seu_kera[[i]]))[grepl(pattern = "^MT-", x = toupper(rownames(GetAssayData(object=seu_kera[[i]]))))]
percent.mito <- Matrix::colSums(GetAssayData(object=seu_kera[[i]], slot="counts")[mito.genes, ]) / Matrix::colSums(GetAssayData(object=seu_kera[[i]], slot="counts"))
seu_kera[[i]] <- AddMetaData(object = seu_kera[[i]], metadata = percent.mito, col.name = "prop.mt")
# Restrict proportion MT expression to remove unhappy cells
seu_kera[[i]] = subset(x = seu_kera[[i]], subset = prop.mt > 0.03 & prop.mt < 0.1)
# Remove potential doublets
seu[[i]] = subset(x = seu[[i]], subset = nFeature_RNA < 6500 & nCount_RNA < 55000)
seu_kera[[i]] = SCTransform(seu_kera[[i]], verbose = FALSE, vars.to.regress=c("prop.mt", "nFeature_RNA", "nCount_RNA"))
seu_kera[[i]] = CellCycleScoring(seu_kera[[i]], s.features = cc.genes$s.genes, g2m.features = cc.genes$g2m.genes, assay = 'SCT', set.ident = TRUE)
# Enable this when also adjusting for cell cycle
# seu[[i]] = SCTransform(seu[[i]], verbose = FALSE, vars.to.regress=c("prop.mt", "nFeature_RNA", "nCount_RNA", "S.Score", "G2M.Score"))
seu[[i]] = AddMetaData(object=seu[[i]], metadata=rep(library_names[i], length(seu[[i]]$nCount_RNA)), col.name="library")
}
Rerun the integration with the newly adjusted values
# This is required for PrepSCTIntegration, an increase from the R default 512 to 5120 for this dataset
options(future.globals.maxSize=5120*1024^2)
seu_features = SelectIntegrationFeatures(object.list = seu_kera, nfeatures = 3000)
seu_kera = PrepSCTIntegration(object.list = seu_kera, anchor.features = seu_features, verbose = FALSE)
seu_kera = lapply(X = seu_kera, FUN = RunPCA, verbose = FALSE, features = seu_features)
seu_anchors = FindIntegrationAnchors(object.list = seu_kera, normalization.method = "SCT", anchor.features = seu_features, verbose = FALSE, reduction = "rpca")
seu_kera_integrated = IntegrateData(anchorset = seu_anchors, normalization.method = "SCT", verbose = FALSE)
library_per_cell = factor(unlist(lapply(seu, function(x) x@meta.data$library)), levels=library_names)
names(library_per_cell) = unlist(lapply(seu, function(x) rownames(x@meta.data)))
seu_kera_integrated = AddMetaData(object=seu_kera_integrated, metadata=library_per_cell, col.name="library")
seu_kera_integrated = RunPCA(seu_kera_integrated, verbose = FALSE)
VlnPlot(seu_kera_integrated, group.by="orig.ident", features = c("nFeature_RNA", "nCount_RNA", "prop.mt"), ncol = 3, pt.size=0.01)
ElbowPlot(seu_kera_integrated, ndims=50)
Here we do take the point where the additional variation tail in the above plot becomes flat. Picked 10 PCs.
seu_kera_integrated = RunUMAP(seu_kera_integrated, dims = 1:10)
13:01:04 UMAP embedding parameters a = 0.9922 b = 1.112
13:01:04 Read 8807 rows and found 10 numeric columns
13:01:04 Using Annoy for neighbor search, n_neighbors = 30
13:01:04 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
13:01:07 Writing NN index file to temp file /var/folders/bz/p63rv1754pv9v6rz33xgmy8sgyrrg3/T//RtmpsxGViF/fileb6717dd80e2
13:01:07 Searching Annoy index using 1 thread, search_k = 3000
13:01:09 Annoy recall = 100%
13:01:11 Commencing smooth kNN distance calibration using 1 thread
13:01:14 Initializing from normalized Laplacian + noise
13:01:14 Commencing optimization for 500 epochs, with 362192 positive edges
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
13:01:27 Optimization finished
UMAPPlot(seu_kera_integrated)
p = UMAPPlot(seu_kera_integrated,group.by="library")
p = theme_publication_plot(p, "Library")
print(p)
cowplot::save_plot(plot=p,
filename = paste0(fileprefix, "supplfig_umap_keratinocytes_overlay_library.pdf"),
base_height=4,
base_width=6)
UMAPPlot(seu_kera_integrated,group.by="Phase")
plot_dat = as.data.frame(seu_kera_integrated@reductions[["umap"]]@cell.embeddings)
plot_dat$krt14 = log(tpm["Krt14", rownames(seu_kera_integrated@meta.data)])
plot_dat$tgm3 = log(tpm["Tgm3", rownames(seu_kera_integrated@meta.data)])
plot_dat$krt4 = log(tpm["Krt4", rownames(seu_kera_integrated@meta.data)])
plot_dat$lor = log(tpm["Lor", rownames(seu_kera_integrated@meta.data)])
p1 = theme_publication_plot(ggplot(plot_dat) + aes(x=UMAP_1, y=UMAP_2, colour=krt14) + geom_point(size=0.2) + xlab("UMAP 1") + ylab("UMAP 2") + theme_cowplot() + ggtitle("Krt14"), legend_title = "log(TPM)")
p2 = theme_publication_plot(ggplot(plot_dat) + aes(x=UMAP_1, y=UMAP_2, colour=tgm3) + geom_point(size=0.2) + xlab("UMAP 1") + ylab("UMAP 2") + theme_cowplot() + ggtitle("Tgm3"), legend_title = "log(TPM)")
p3 = theme_publication_plot(ggplot(plot_dat) + aes(x=UMAP_1, y=UMAP_2, colour=krt4) + geom_point(size=0.2) + xlab("UMAP 1") + ylab("UMAP 2") + theme_cowplot() + ggtitle("Krt4"), legend_title = "log(TPM)")
p4 = theme_publication_plot(ggplot(plot_dat) + aes(x=UMAP_1, y=UMAP_2, colour=lor) + geom_point(size=0.2) + xlab("UMAP 1") + ylab("UMAP 2") + theme_cowplot() + ggtitle("Lor"), legend_title = "log(TPM)")
p = plot_grid(p1, p2, p3, p4, ncol=2)
print(p)
cowplot::save_plot(plot=p,
filename = paste0(fileprefix, "supplfig_keratinocytes_marker_expression.pdf"),
base_height=4,
base_width=6)
VlnPlot(seu_kera_integrated, group.by="orig.ident", features = "prop.mt", ncol = 1, pt.size=0.01)
FeaturePlot(seu_kera_integrated, features=c("nFeature_RNA", "nCount_RNA", "prop.mt"))
plot_dat = as.data.frame(seu_kera_integrated@reductions$umap@cell.embeddings)
plot_dat$phase = seu_kera_integrated@meta.data$Phase
plot_dat$phase = factor(plot_dat$phase, levels=c("G1", "G2M", "S"))
# mycolours = brewer.pal(4, "Dark2")
p = ggplot(plot_dat) +
aes(x=UMAP_1, y=UMAP_2, colour=phase) +
geom_point(size=0.2) +
xlab("UMAP 1") + ylab("UMAP 2")
p = theme_publication_plot(p, "Cell type")
print(p)
cowplot::save_plot(plot=p,
filename = paste0(fileprefix, "figure5h.pdf"),
base_height=4,
base_width=6)
phase_count = seu_kera_integrated@meta.data %>% group_by(library, Phase) %>% summarise(n=n()) %>% mutate(frac = n/sum(n))
`summarise()` regrouping output by 'library' (override with `.groups` argument)
phase_count$library = factor(phase_count$library, levels=c("WT 1", "WT 2", "HOM KO 1", "HOM KO 2"))
p = ggplot(phase_count) +
aes(x=library, y=frac, fill=Phase) +
geom_bar(position="dodge", stat="identity") +
xlab("Library") + ylab("Proportion of cells per library")
p = theme_publication_plot(p, "Cycle phase")
print(p)
cowplot::save_plot(plot=p,
filename = paste0(fileprefix, "figure5i_alt1.pdf"),
base_height=4,
base_width=6)
p = ggplot(phase_count) +
aes(x=library, y=frac, fill=Phase) +
geom_bar(stat="identity", position = position_fill(reverse = TRUE)) +
xlab("Library") + ylab("Proportion of cells per library")
p = theme_publication_plot(p, "Cycle phase")
print(p)
cowplot::save_plot(plot=p,
filename = paste0(fileprefix, "figure5i_alt2.pdf"),
base_height=4,
base_width=6)
write.table(phase_count[, c("library", "Phase", "frac")] %>% pivot_wider(names_from=Phase, values_from=frac),
file=paste0(fileprefix, "frac_cells_library_cellcycle.txt"), quote=F, sep="\t", row.names=F)
# get tpm for marker genes per cluster
seu_kera_integrated = FindNeighbors(seu_kera_integrated, dims = 1:10)
Computing nearest neighbor graph
Computing SNN
seu_kera_integrated = FindClusters(object = seu_kera_integrated, resolution = 0.6, verbose=FALSE)
markers_keratinocytes = FindAllMarkers(seu_kera_integrated, only.pos = TRUE, min.pct = 0.1, thresh.use = 0.25, verbose = F)
plot <- DimPlot(object = seu_kera_integrated, reduction = "umap")
LabelClusters(plot = plot, id = 'ident', size=6)
write.table(markers_keratinocytes, file=paste0(fileprefix, "signif_expr_markers_keratinocyte_analysis.txt"), quote=F, sep="\t", row.names=F)
The ‘circle’ is formed of basal cells, the tail of suprabasal. Here we make the observation that known marker genes broadly define the cutoff point, which we approximate with the right below defined slope/intercept of a line. What we really want to know is whether libraries are equally represented on both sides of the line. We’re less concerned with whether this is the optimal separation of basal and suprabasal cells, especially given it seems likely differentiated cells are less likely to make it through the scRNAseq protocols due to their characteristics.
# established to broadly capture the changeover point where Krt14 drops of and Tgm3 starts going up
slope = 1.5
intercept = 5.5
add_threshold = function(p, slope, intercept) {
return(p + geom_abline(slope=slope, intercept=intercept, colour="black", linetype=2))
}
pc1 = as.data.frame(seu_kera_integrated@reductions[["umap"]]@cell.embeddings[, c(1, 2), drop=F])
pc1$library = as.character(seu_kera_integrated@meta.data$library[match(rownames(pc1), rownames(seu_kera_integrated@meta.data))])
pc1$selected = ifelse(pc1$UMAP_2 > (intercept+slope*pc1$UMAP_1), 0, 1)
pc1$selected = factor(pc1$selected, levels=c(1, 0))
pc1$layer = "Basal"
pc1$layer[pc1$selected=="0"] = "Suprabasal"
pc1$layer = factor(pc1$layer, levels=c("Basal", "Suprabasal"))
Plot the expression of a number of markers relative to the selected threshold to show we’ve broadly captured the crossover area
plot_dat = as.data.frame(seu_kera_integrated@reductions[["umap"]]@cell.embeddings)
plot_dat$krt14 = log(tpm["Krt14", rownames(seu_kera_integrated@meta.data)])
plot_dat$tgm3 = log(tpm["Tgm3", rownames(seu_kera_integrated@meta.data)])
plot_dat$krt4 = log(tpm["Krt4", rownames(seu_kera_integrated@meta.data)])
plot_dat$lor = log(tpm["Lor", rownames(seu_kera_integrated@meta.data)])
p1 = theme_publication_plot(ggplot(plot_dat) + aes(x=UMAP_1, y=UMAP_2, colour=krt14) + geom_point(size=0.2) + xlab("UMAP 1") + ylab("UMAP 2") + theme_cowplot() + ggtitle("Krt14"), legend_title = "log(TPM)")
p2 = theme_publication_plot(ggplot(plot_dat) + aes(x=UMAP_1, y=UMAP_2, colour=tgm3) + geom_point(size=0.2) + xlab("UMAP 1") + ylab("UMAP 2") + theme_cowplot() + ggtitle("Tgm3"), legend_title = "log(TPM)")
p3 = theme_publication_plot(ggplot(plot_dat) + aes(x=UMAP_1, y=UMAP_2, colour=krt4) + geom_point(size=0.2) + xlab("UMAP 1") + ylab("UMAP 2") + theme_cowplot() + ggtitle("Krt4"), legend_title = "log(TPM)")
p4 = theme_publication_plot(ggplot(plot_dat) + aes(x=UMAP_1, y=UMAP_2, colour=lor) + geom_point(size=0.2) + xlab("UMAP 1") + ylab("UMAP 2") + theme_cowplot() + ggtitle("Lor"), legend_title = "log(TPM)")
p = plot_grid(add_threshold(p1, slope, intercept),
add_threshold(p2, slope, intercept),
add_threshold(p3, slope, intercept),
add_threshold(p4, slope, intercept), ncol=2)
print(p)
cowplot::save_plot(plot=p,
filename = paste0(fileprefix, "supplfig_basal_cell_threshold_vs_markers.pdf"),
base_height=4,
base_width=6)
Plot the threshold per library to show which cells have been marked as basal cells
mycolours = c("red", "grey")
p1 = ggplot(pc1[pc1$library=="WT 1",]) +
aes(x=UMAP_1, y=UMAP_2, colour=layer) +
geom_point(size=0.2) +
scale_colour_manual(values=mycolours, na.value="grey") +
xlab("UMAP 1") + ylab("UMAP 2") + ggtitle("WT 1") + theme_cowplot() + theme(legend.position="none")
p2 = ggplot(pc1[pc1$library=="WT 2",]) +
aes(x=UMAP_1, y=UMAP_2, colour=layer) +
geom_point(size=0.2) +
scale_colour_manual(values=mycolours, na.value="grey") +
xlab("UMAP 1") + ylab("UMAP 2") + ggtitle("WT 2") + theme_cowplot() + theme(legend.position="none")
p3 = ggplot(pc1[pc1$library=="HOM KO 1",]) +
aes(x=UMAP_1, y=UMAP_2, colour=layer) +
geom_point(size=0.2) +
scale_colour_manual(values=mycolours, na.value="grey") +
xlab("UMAP 1") + ylab("UMAP 2") + ggtitle("HOM KO 1") + theme_cowplot() + theme(legend.position="none")
p4 = ggplot(pc1[pc1$library=="HOM KO 2",]) +
aes(x=UMAP_1, y=UMAP_2, colour=layer) +
geom_point(size=0.2) +
scale_colour_manual(values=mycolours, na.value="grey") +
xlab("UMAP 1") + ylab("UMAP 2") + ggtitle("HOM KO 2") + theme_cowplot() + theme(legend.position="none")
p = plot_grid(add_threshold(p1, slope, intercept),
add_threshold(p2, slope, intercept),
add_threshold(p3, slope, intercept),
add_threshold(p4, slope, intercept))
print(p)
cowplot::save_plot(plot=p,
filename = paste0(fileprefix, "supplfig_basal_cell_classification_map.pdf"),
base_height=4,
base_width=6)
Summarise the calls into a table
basal_cell_count = pc1 %>% group_by(selected, library) %>% summarise(n=n()) %>% pivot_wider(names_from=selected, values_from=n)
`summarise()` regrouping output by 'selected' (override with `.groups` argument)
colnames(basal_cell_count)[2:3] = c("num_basal", "num_suprabasal")
basal_cell_count[, c("frac_basal", "frac_suprabasal")] = basal_cell_count[, 2:3] / rowSums(basal_cell_count[, 2:3])
print(basal_cell_count)
write.table(basal_cell_count, file=paste0(fileprefix, "notch1_fraction_basal_cells_umap_threshold.txt"), quote=F, sep="\t", row.names=F)
Make the summary figures for the paper
mycolours = c("red", "grey")
p = ggplot(pc1) +
aes(x=UMAP_1, y=UMAP_2, colour=layer) +
geom_point(size=0.2) +
scale_colour_manual(values=mycolours, na.value="grey") +
xlab("UMAP 1") + ylab("UMAP 2")
p = theme_publication_plot(p, "Layer")
print(p)
cowplot::save_plot(plot=p,
filename = paste0(fileprefix, "figure5f.pdf"),
base_height=4,
base_width=6)
basal_cell_count = basal_cell_count[,c("library", "frac_basal", "frac_suprabasal")]
basal_cell_count$library = factor(basal_cell_count$library, levels=c("WT 1", "WT 2", "HOM KO 1", "HOM KO 2"))
colnames(basal_cell_count)[2:3] = c("Basal", "Suprabasal")
p = ggplot(basal_cell_count %>% pivot_longer(!library)) +
aes(x=library, y=value, fill=name) +
geom_bar(position="dodge", stat="identity") +
scale_fill_manual(values=mycolours, na.value="grey") +
xlab("Library") + ylab("Proportion of cells per library")
p = theme_publication_plot(p, "Layer")
print(p)
cowplot::save_plot(plot=p,
filename = paste0(fileprefix, "figure5g_alt1.pdf"),
base_height=4,
base_width=6)
p = ggplot(basal_cell_count %>% pivot_longer(!library)) +
aes(x=library, y=value, fill=name) +
geom_bar(stat="identity", position = position_fill(reverse = TRUE)) +
scale_fill_manual(values=mycolours, na.value="grey") +
xlab("Library") + ylab("Proportion of cells per library")
p = theme_publication_plot(p, "Layer")
print(p)
cowplot::save_plot(plot=p,
filename = paste0(fileprefix, "figure5g_alt2.pdf"),
base_height=4,
base_width=6)
Plot Notch1 expression per library
notch1_expr = data.frame(expression=tpm["Notch1", rownames(seu_kera_integrated@meta.data)], library=seu_kera_integrated@meta.data$library)
notch1_expr$library = factor(notch1_expr$library, levels=c("WT 1", "WT 2", "HOM KO 1", "HOM KO 2"))
p = ggplot(notch1_expr) + aes(x=library, y=expression, fill=library) + geom_boxplot() +
theme_cowplot() + xlab("Library") + ylab("TPM")
p = theme_publication_plot(p, "Library", legend_aes=1)
print(p)
cowplot::save_plot(plot=p,
filename = paste0(fileprefix, "supplfig_notch1_expression_only_keratinocytes.pdf"),
base_height=4,
base_width=6)
Downsampled plot to show an equal number of cells per library - 1000 cells
counts_per_library = table(seu_integrated@meta.data$library)
set.seed(123)
selected_cells = rownames(seu_integrated@meta.data)[seu_integrated@meta.data$library=="WT 1"][sample(1:counts_per_library["WT 1"], 1000)]
selected_cells = c(selected_cells, rownames(seu_integrated@meta.data)[seu_integrated@meta.data$library=="WT 2"][sample(1:counts_per_library["WT 2"], 1000)])
selected_cells = c(selected_cells, rownames(seu_integrated@meta.data)[seu_integrated@meta.data$library=="HOM KO 1"][sample(1:counts_per_library["HOM KO 1"], 1000)])
selected_cells = c(selected_cells, rownames(seu_integrated@meta.data)[seu_integrated@meta.data$library=="HOM KO 2"][sample(1:counts_per_library["HOM KO 2"], 1000)])
seu_integrated_temp = subset(seu_integrated, cells=selected_cells)
p = UMAPPlot(seu_integrated_temp,group.by="library")
p = theme_publication_plot(p, "Library")
print(p)
cowplot::save_plot(plot=p,
filename = "supplfig_umap_overlay_library_downsampled1000.pdf",
base_height=4,
base_width=6)
# plot_data = as.data.frame(seu_integrated_temp@reductions[["umap"]]@cell.embeddings)
# plot_data$celltype = seu_integrated_temp@meta.data$celltype
# plot_data$celltype = unlist(lapply(plot_data$celltype, function(x) { paste(toupper(substr(x, 1, 1)), substr(x, 2, nchar(x)), sep="") }))
# plot_data$celltype_fctr = factor(plot_data$celltype, levels=c("Keratinocyte", "Fibroblast", "Immune", "Endothelial"))
# mycolours = brewer.pal(4, "Dark2")
# p = ggplot(plot_data) +
# aes(x=UMAP_1, y=UMAP_2, colour=celltype_fctr) +
# geom_point(size=0.2) +
# scale_colour_manual(values=mycolours, na.value="grey") +
# xlab("UMAP 1") + ylab("UMAP 2")
# p = theme_publication_plot(p, "Cell type")
# print(p)
#
# cowplot::save_plot(plot=p,
# filename = "figure5d_downsampled1000.pdf",
# base_height=4,
# base_width=6)
Downsampled plot to show an equal number of cells per library - 1500 cells
counts_per_library = table(seu_integrated@meta.data$library)
set.seed(123)
selected_cells = rownames(seu_integrated@meta.data)[seu_integrated@meta.data$library=="WT 1"][sample(1:counts_per_library["WT 1"], 1500)]
selected_cells = c(selected_cells, rownames(seu_integrated@meta.data)[seu_integrated@meta.data$library=="WT 2"][sample(1:counts_per_library["WT 2"], 1500)])
selected_cells = c(selected_cells, rownames(seu_integrated@meta.data)[seu_integrated@meta.data$library=="HOM KO 1"][sample(1:counts_per_library["HOM KO 1"], 1500)])
selected_cells = c(selected_cells, rownames(seu_integrated@meta.data)[seu_integrated@meta.data$library=="HOM KO 2"][sample(1:counts_per_library["HOM KO 2"], 1500)])
seu_integrated_temp = subset(seu_integrated, cells=selected_cells)
p = UMAPPlot(seu_integrated_temp,group.by="library")
p = theme_publication_plot(p, "Library")
print(p)
cowplot::save_plot(plot=p,
filename = "supplfig_umap_overlay_library_downsampled1500.pdf",
base_height=4,
base_width=6)
# plot_data = as.data.frame(seu_integrated_temp@reductions[["umap"]]@cell.embeddings)
# plot_data$celltype = seu_integrated_temp@meta.data$celltype
# plot_data$celltype = unlist(lapply(plot_data$celltype, function(x) { paste(toupper(substr(x, 1, 1)), substr(x, 2, nchar(x)), sep="") }))
# plot_data$celltype_fctr = factor(plot_data$celltype, levels=c("Keratinocyte", "Fibroblast", "Immune", "Endothelial"))
# mycolours = brewer.pal(4, "Dark2")
# p = ggplot(plot_data) +
# aes(x=UMAP_1, y=UMAP_2, colour=celltype_fctr) +
# geom_point(size=0.2) +
# scale_colour_manual(values=mycolours, na.value="grey") +
# xlab("UMAP 1") + ylab("UMAP 2")
# p = theme_publication_plot(p, "Cell type")
# print(p)
#
# cowplot::save_plot(plot=p,
# filename = "figure5d_downsampled1500.pdf",
# base_height=4,
# base_width=6)
Save the outcome
# save the Krt4 estimates
metadat = seu_kera_integrated@meta.data
metadat$cell = rownames(metadat)
metadat = metadat[, c("cell", "orig.ident", "Phase", "prop.mt", "nCount_RNA", "nFeature_RNA")]
colnames(metadat)[2] = "library"
metadat$is_basal = pc1$layer=="Basal"
write.table(metadat, file=paste0(fileprefix, "notch1_basal_suprabasal_calls_umap_threshold.txt"), quote=F, sep="\t", row.names=F)
sessionInfo()
R version 4.0.4 (2021-02-15)
Platform: x86_64-apple-darwin17.0 (64-bit)
Running under: macOS Catalina 10.15.7
Matrix products: default
BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
locale:
[1] en_GB.UTF-8/en_GB.UTF-8/en_GB.UTF-8/C/en_GB.UTF-8/en_GB.UTF-8
attached base packages:
[1] parallel stats4 stats graphics grDevices utils datasets methods base
other attached packages:
[1] SoupX_1.4.8 tidyr_1.1.1 dplyr_1.0.0 cowplot_1.0.0 scater_1.16.2
[6] ggplot2_3.3.2 SingleCellExperiment_1.10.1 SummarizedExperiment_1.18.2 DelayedArray_0.14.1 matrixStats_0.56.0
[11] Biobase_2.48.0 GenomicRanges_1.40.0 GenomeInfoDb_1.24.2 IRanges_2.22.2 S4Vectors_0.26.1
[16] BiocGenerics_0.34.0 Seurat_3.2.0 RColorBrewer_1.1-2 extrafont_0.17
loaded via a namespace (and not attached):
[1] ggbeeswarm_0.6.0 Rtsne_0.15 colorspace_1.4-1 deldir_0.1-28 ellipsis_0.3.1
[6] ggridges_0.5.2 XVector_0.28.0 BiocNeighbors_1.6.0 base64enc_0.1-3 rstudioapi_0.11
[11] spatstat.data_1.4-3 leiden_0.3.3 listenv_0.8.0 ggrepel_0.8.2 codetools_0.2-18
[16] splines_4.0.4 knitr_1.29 polyclip_1.10-0 jsonlite_1.7.0 Rttf2pt1_1.3.8
[21] ica_1.0-2 cluster_2.1.0 png_0.1-7 uwot_0.1.8 shiny_1.5.0
[26] sctransform_0.2.1 compiler_4.0.4 httr_1.4.2 Matrix_1.3-2 fastmap_1.0.1
[31] lazyeval_0.2.2 BiocSingular_1.4.0 later_1.1.0.1 htmltools_0.5.1.1 tools_4.0.4
[36] rsvd_1.0.3 igraph_1.2.6 gtable_0.3.0 glue_1.4.1 GenomeInfoDbData_1.2.3
[41] RANN_2.6.1 reshape2_1.4.4 rappdirs_0.3.1 Rcpp_1.0.5 spatstat_1.64-1
[46] vctrs_0.3.2 ape_5.4 nlme_3.1-152 extrafontdb_1.0 DelayedMatrixStats_1.10.1
[51] lmtest_0.9-37 xfun_0.16 stringr_1.4.0 globals_0.14.0 mime_0.9
[56] miniUI_0.1.1.1 lifecycle_0.2.0 irlba_2.3.3 goftest_1.2-2 future_1.21.0
[61] zlibbioc_1.34.0 MASS_7.3-53 zoo_1.8-8 scales_1.1.1 promises_1.1.1
[66] spatstat.utils_1.17-0 yaml_2.2.1 reticulate_1.16 pbapply_1.4-2 gridExtra_2.3
[71] rpart_4.1-15 stringi_1.4.6 BiocParallel_1.22.0 rlang_0.4.7 pkgconfig_2.0.3
[76] bitops_1.0-6 evaluate_0.14 lattice_0.20-41 ROCR_1.0-11 purrr_0.3.4
[81] tensor_1.5 patchwork_1.0.1 htmlwidgets_1.5.1 tidyselect_1.1.0 parallelly_1.23.0
[86] RcppAnnoy_0.0.16 plyr_1.8.6 magrittr_2.0.1 R6_2.4.1 generics_0.0.2
[91] withr_2.2.0 pillar_1.4.6 mgcv_1.8-33 fitdistrplus_1.1-1 survival_3.2-7
[96] abind_1.4-5 RCurl_1.98-1.2 tibble_3.0.3 future.apply_1.6.0 crayon_1.3.4
[101] KernSmooth_2.23-18 plotly_4.9.2.1 rmarkdown_2.3 viridis_0.5.1 grid_4.0.4
[106] data.table_1.13.0 digest_0.6.25 xtable_1.8-4 httpuv_1.5.4 munsell_0.5.0
[111] beeswarm_0.2.3 viridisLite_0.3.0 vipor_0.4.5